Developer CD Series 2000 November: Tool Chest

home *** CD-ROM | disk | FTP | other *** search

/ Developer CD Series 2000 November: Tool Chest / Dev.CD Nov 00 TC Disk 1.toast / Sample Code / Contributed / SpriteWorld / SpriteWorld Files / BlitPixie / Sources / BlitPixieDoubled.c < prev next >

Wrap

Text File | 2000-10-06 | 11.7 KB | 427 lines | [TEXT/CWIE]

///-------------------------------------------------------------------------------------- // BlitPixieDoubled // a fast pixel-doubling blitter // // written by Anders F Björklund <afb@algonet.se> // ©1999 afb. ///-------------------------------------------------------------------------------------- #ifndef __BLITPIXIE__ #include "BlitPixieHeader.h" #endif #include "BlitPixieAsm.h" #pragma mark *** PowerPC asm: #if USE_PPC_ASSEMBLY ASM_FUNC void BlitPixieDoubled8Bit( register unsigned char *source, register unsigned char *destination, register unsigned long srcRowBytes, register unsigned long dstRowBytes, register unsigned short width, register unsigned short height) { #define r_src r3 #define r_dst r4 #define r_srcRowBytes r5 #define r_dstRowBytes r6 #define r_width r7 #define r_height r8 #define r_dst2 r9 #define r_temp1 r10 #define r_temp2 r11 ASM_BEGIN subi r_dst,r_dst,8 add r_dst2,r_dst,r_dstRowBytes sub r_srcRowBytes,r_srcRowBytes,r_width // subtract width from rowbytes for stride sub r_dstRowBytes,r_dstRowBytes,r_width // subtract width from rowbytes for stride add r_dstRowBytes,r_dstRowBytes,r_dstRowBytes rlwinm r_width,r_width,32-2,2,31 subi r_src,r_src,4 subi r_width,r_width,1 @rowloop: lwzu r0,4(r_src) // load 4 pixels into r0 /* Pixel building process: r0: ABCD r10: A*** AAB* AABB AABB**** r11: ***D *CDD CCDD ****CCDD fp0: AABBCCDD */ mr r10,r0 // put copy in r10 mr r11,r0 // and in r11 rlwimi r10,r0,24,8,23 // copy upper 16 bits to middle of r10 rlwimi r11,r0,8,8,23 // copy lower 16 bits to middle of r11 rlwimi r10,r0,16,24,31 // get remaining bits into r10 stw r10,-8(SP) // store upper 4 pixels into part of double rlwimi r11,r0,16,0,7 // get remaining bits into r11 stw r11,-4(SP) // store lower 4 pixels into part of double mtctr r_width // copy width into counter lfd fp0,-8(SP) // load double @loop: lwzu r0,4(r_src) // load 4 pixels into r10 stfdu fp0,8(r_dst) // store a double from before // (same as above) mr r10,r0 // put copy in r10 mr r11,r0 // and in r11 rlwimi r10,r0,24,8,23 // copy upper 16 bits to middle of r10 rlwimi r11,r0,8,8,23 // copy lower 16 bits to middle of r11 rlwimi r10,r0,16,24,31 // get remaining bits into r10 stw r10,-8(SP) // store upper 4 pixels into part of double rlwimi r11,r0,16,0,7 // get remaining bits into r11 stw r11,-4(SP) // store lower 4 pixels into part of double stfdu fp0,8(r_dst2) // store a double from before lfd fp0,-8(SP) // load double bdnz+ @loop // loop over all x stfdu fp0,8(r_dst) subic. r_height,r_height,1 add r_src,r_src,r_srcRowBytes add r_dst,r_dst,r_dstRowBytes stfdu fp0,8(r_dst2) // store a double from before add r_dst2,r_dst2,r_dstRowBytes bne @rowloop // loop over all y ASM_END } ASM_FUNC void BlitPixieDoubled16Bit( register unsigned short *source, register unsigned short *destination, register unsigned long srcRowBytes, register unsigned long dstRowBytes, register unsigned short width, register unsigned short height) { #define r_src r3 #define r_dst r4 #define r_srcRowBytes r5 #define r_dstRowBytes r6 #define r_width r7 #define r_height r8 #define r_dst2 r9 #define r_temp1 r10 #define r_temp2 r11 ASM_BEGIN subi r_dst,r_dst,8 add r_dst2,r_dst,r_dstRowBytes sub r_srcRowBytes,r_srcRowBytes,r_width // subtract 2*width from rowbytes for stride sub r_srcRowBytes,r_srcRowBytes,r_width sub r_dstRowBytes,r_dstRowBytes,r_width // subtract 2*width from rowbytes for stride sub r_srcRowBytes,r_srcRowBytes,r_width add r_dstRowBytes,r_dstRowBytes,r_dstRowBytes rlwinm r_width,r_width,32-2,2,31 subi r_src,r_src,4 subi r_width,r_width,1 @rowloop: lwzu r0,4(r_src) // load 2 pixels into r0 /* Pixel building process: r0: AABB r10: AA** AAAA AAAA**** r11: **BB BBBB ****BBBB fp0: AAAABBBB */ mr r10,r0 // put copy in r10 mr r11,r0 // and in r11 rlwimi r10,r0,16,16,31 // copy upper 16 bits to lower of r10 rlwimi r11,r0,16,0,15 // copy lower 16 bits to upper of r11 stw r10,-8(SP) // store upper 4 pixels into part of double stw r11,-4(SP) // store lower 4 pixels into part of double mtctr r_width // copy width into counter lfd fp0,-8(SP) // load double @loop: lwzu r0,4(r_src) // load 2 pixels into r10 stfdu fp0,8(r_dst) // store a double from before // (same as above) mr r10,r0 // put copy in r10 mr r11,r0 // and in r11 rlwimi r10,r0,16,16,31 // copy upper 16 bits to lower of r10 rlwimi r11,r0,16,0,15 // copy lower 16 bits to upper of r11 stw r10,-8(SP) // store upper 4 pixels into part of double stw r11,-4(SP) // store lower 4 pixels into part of double stfdu fp0,8(r_dst2) // store a double from before lfd fp0,-8(SP) // load double bdnz+ @loop // loop over all x stfdu fp0,8(r_dst) subic. r_height,r_height,1 add r_src,r_src,r_srcRowBytes add r_dst,r_dst,r_dstRowBytes stfdu fp0,8(r_dst2) // store a double from before add r_dst2,r_dst2,r_dstRowBytes bne @rowloop // loop over all y ASM_END } #pragma mark *** 680x0 asm: #elif USE_68K_ASSEMBLY ASM_FUNC void BlitPixieDoubled8Bit( unsigned char *source, unsigned char *destination, unsigned long srcBytes, unsigned long dstBytes, unsigned short width, unsigned short height ) { #define A_src A0 #define A_dst A1 #define A_dst2 A2 #define D_srcRowBytes D3 #define D_dstRowBytes D4 #define D_x D5 #define D_y D6 #define D_temp1 D0 #define D_temp2 D1 #define D_pixel D7 ASM_BEGIN MOVEM.L D3-D7/A2,-(SP) MOVEM.L source,A0-A1 MOVEM.L srcBytes,D3-D6 ANDI.W #~3,D_x MOVEA.L A_dst,A_dst2 ADDA.L D_dstRowBytes,A_dst2 SUB.L D_x,D_srcRowBytes SUB.L D_x,D_dstRowBytes ADD.L D_dstRowBytes,D_dstRowBytes /* two rows in dest per loop */ LSR.W #2,D_x /* four pixels per loop */ SWAP D_y /* cleverly use hi-word for y count (was out of registers) */ @NextRow: MOVE.W D_x,D_y /* using 'y' for both x/y counts */ @NextPixels: MOVE.L (A_src)+, D_temp1 /* get the four source pixels */ MOVE.L D_temp1,D_temp2 /* make a copy */ LSR.L #8,D_temp1 /* "space out" two … */ LSR.W #8,D_temp1 /* … adjacent pixels */ MOVE.L D_temp1,D_pixel /* copy "out spaced" pixels */ LSL.L #8,D_pixel /* shift the copy over … */ OR.L D_temp1,D_pixel /* … and recombine */ MOVE.L D_pixel,(A_dst)+ /* write the first two pixels */ MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */ SWAP D_temp2 /* do next two pixels */ LSR.L #8,D_temp2 /* repeat, as above */ LSR.W #8,D_temp2 MOVE.L D_temp2,D_pixel LSL.L #8,D_pixel OR.L D_temp2,D_pixel MOVE.L D_pixel,(A_dst)+ /* write the last two pixels */ MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */ SUBQ.W #1, D_y BNE.S @NextPixels ADDA.L D_srcRowBytes,A_src /* bump to next row */ ADDA.L D_dstRowBytes,A_dst ADDA.L D_dstRowBytes,A_dst2 SUB.L #0x00010000, D_y BNE.S @NextRow MOVEM.L (SP)+,D3-D7/A2 ASM_END } ASM_FUNC void BlitPixieDoubled16Bit( unsigned short *source, unsigned short *destination, unsigned long srcBytes, unsigned long dstBytes, unsigned short width, unsigned short height ) { #define A_src A0 #define A_dst A1 #define A_dst2 A2 #define D_srcRowBytes D3 #define D_dstRowBytes D4 #define D_x D5 #define D_y D6 #define D_temp1 D0 #define D_temp2 D1 #define D_pixel D7 ASM_BEGIN MOVEM.L D3-D7/A2,-(SP) MOVEM.L source,A0-A1 MOVEM.L srcBytes,D3-D6 ANDI.W #~3,D_x MOVEA.L A_dst,A_dst2 ADDA.L D_dstRowBytes,A_dst2 SUB.L D_x,D_srcRowBytes SUB.L D_x,D_srcRowBytes SUB.L D_x,D_dstRowBytes SUB.L D_x,D_dstRowBytes ADD.L D_dstRowBytes,D_dstRowBytes /* two rows in dest per loop */ LSR.W #1,D_x /* two pixels per loop */ SWAP D_y /* cleverly use hi-word for y count (was out of registers) */ @NextRow: MOVE.W D_x,D_y /* using 'y' for both x/y counts */ @NextPixels: MOVE.L (A_src)+, D_temp1 /* get the two source pixels */ MOVE.L D_temp1,D_temp2 /* make a copy */ SWAP D_temp1 MOVE.W D_temp1,D_pixel SWAP D_pixel MOVE.W D_temp1,D_pixel MOVE.L D_pixel,(A_dst)+ /* write the first two pixels */ MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */ SWAP D_temp2 /* do next two pixels */ MOVE.W D_temp2,D_pixel SWAP D_pixel MOVE.W D_temp2,D_pixel MOVE.L D_pixel,(A_dst)+ /* write the last two pixels */ MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */ SUBQ.W #1, D_y BNE.S @NextPixels ADDA.L D_srcRowBytes,A_src /* bump to next row */ ADDA.L D_dstRowBytes,A_dst ADDA.L D_dstRowBytes,A_dst2 SUB.L #0x00010000, D_y BNE.S @NextRow MOVEM.L (SP)+,D3-D7/A2 ASM_END } #pragma mark *** Generic C: #elif USE_GENERIC_C void BlitPixieDoubled8Bit( unsigned char *source, unsigned char *destination, unsigned long srcRowBytes, unsigned long dstRowBytes, unsigned short width, unsigned short height) { int x,y; int x2,y2; unsigned char c; for ( y = 0, y2 = 0; y < height; y++, y2 += 2 ) { for ( x = 0, x2 = 0; x < width; x++, x2 += 2 ) { c = source[ y * srcRowBytes + x ]; destination[ y2 * dstRowBytes + x2 ] = c; destination[ y2 * dstRowBytes + (x2 + 1) ] = c; destination[ (y2 + 1) * dstRowBytes + x2 ] = c; destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c; } } } void BlitPixieDoubled16Bit( unsigned short *source, unsigned short *destination, unsigned long srcRowBytes, unsigned long dstRowBytes, unsigned short width, unsigned short height) { int x,y; int x2,y2; unsigned short c; for ( y = 0, y2 = 0; y < height; y++, y2 += 2 ) { for ( x = 0, x2 = 0; x < width; x++, x2 += 2 ) { c = source[ y * srcRowBytes + x ]; destination[ y2 * dstRowBytes + x2 ] = c; destination[ y2 * dstRowBytes + (x2 + 1) ] = c; destination[ (y2 + 1) * dstRowBytes + x2 ] = c; destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c; } } } #endif #pragma mark - #ifndef GENERATINGASM // do not include for asm file generation void BlitPixieDoubled32Bit( unsigned long *source, unsigned long *destination, unsigned long srcRowBytes, unsigned long dstRowBytes, unsigned short width, unsigned short height) { int x,y; int x2,y2; unsigned long c; for ( y = 0, y2 = 0; y < height; y++, y2 += 2 ) { for ( x = 0, x2 = 0; x < width; x++, x2 += 2 ) { c = source[ y * srcRowBytes + x ]; destination[ y2 * dstRowBytes + x2 ] = c; destination[ y2 * dstRowBytes + (x2 + 1) ] = c; destination[ (y2 + 1) * dstRowBytes + x2 ] = c; destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c; } } } #endif